Source: National Health and Nutrition Examination Survey
The National Health and Nutrition Examination Survey is a program designed to asses the health and nutritional status of adults and children in the United States. NHANES is a major program of the National Center for Health Statistics (NCHS). NCHS is part of the Centers for Disease Control and Prevention (CDC).
The survey examines a sample of 5,000 persons each year, located in counties across the country. NHANES interview includes demographic, socioeconomic, dietary, and health-related questions. Examination component consists of medical, dental, physiological measurements and laboratory tests.
NHANES 2015–2016 data source was selected since it includes LDL and Triglycerides needed for analysis
Exploratory data analysis
Coding heart disease risk based on LDL score
trigly <- trigly %>%
mutate(
RISK = case_when(
LBDLDL <= 130 ~ "low",
LBDLDL >= 190 ~ "high",
TRUE ~ "medium"
)
)
# table with risk values
table(trigly$RISK)
high low medium
57 2056 1078
Getting race and gender demographics about patients with LDL data
# joining trigly data with demographic by respondent sequence number
demo_trigly <- trigly %>%
inner_join(demo) %>%
mutate(
RIDRETH1 = case_when(
RIDRETH1 == 1 ~ "Mexican American",
RIDRETH1 == 2 ~ "Other Hispanic",
RIDRETH1 == 3 ~ "Non-Hispanic White",
RIDRETH1 == 4 ~ "Non-Hispanic Black",
RIDRETH1 == 5 ~ "Other Race"
),
RIAGENDR = case_when(
RIAGENDR == 1 ~ "Male",
RIAGENDR == 2 ~ "Female"
)
)
Joining, by = "SEQN"
# table with risk values and gender
table(demo_trigly$RIAGENDR, demo_trigly$RISK)
high low medium
Female 26 1062 538
Male 31 994 540
# table with risk values and race/ethnicity
table(demo_trigly$RIDRETH1, demo_trigly$RISK)
high low medium
Mexican American 6 370 162
Non-Hispanic Black 13 451 245
Non-Hispanic White 23 657 358
Other Hispanic 7 280 133
Other Race 8 298 180
Visualizing heart disease risk by gender
# filtering risk
demo_trigly_risk_gender <- demo_trigly %>%
filter(RISK == 'high') %>% # setting fixed value, shiny app will use input select
group_by(RIAGENDR) %>%
summarize(count = n())
colors = c("#B80095", "#3008B1")
p <- plot_ly(demo_trigly_risk_gender, labels = ~RIAGENDR, values = ~count, type = 'pie',
marker = list(colors = colors))
p <- p %>% layout(title = 'Heart disease risk by gender',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
p
Visualizing heart disease risk by gender and race
# filtering risk
demo_trigly_gender_race <- demo_trigly %>%
filter(RISK == 'high') # setting fixed value, shiny app will use input select
p <- ggplot(data = demo_trigly_gender_race, mapping = aes(x = RIDRETH1, fill = RIAGENDR)) +
geom_bar(position = "dodge", mapping = aes(y = ..prop.., group = RIAGENDR)) +
scale_fill_manual(values = c("#B80095", "#3008B1")) +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, hjust = 1)
) +
labs(title = "Heart disease risk by gender and race",
subtitle = "Risk based on LDL level",
x = "Race and Ethnicity",
y = "Risk percentage %",
fill = "Gender")
# transition_states(RIAGENDR) +
# ease_aes('linear')
# anim_save("../shiny/www/gganimate02.gif", animate(p))
ggplotly(p)
Coding Marital status
demo_trigly <- demo_trigly %>%
mutate(
DMDMARTL = case_when(
DMDMARTL == 1 ~ "Married",
DMDMARTL == 2 ~ "Widowed",
DMDMARTL == 3 ~ "Divorced",
DMDMARTL == 4 ~ "Separated",
DMDMARTL == 5 ~ "Never married",
DMDMARTL == 6 ~ "Living with partner"
)
)
# table marital status and heart disease risk
table(demo_trigly$DMDMARTL,demo_trigly$RISK)
high low medium
Divorced 7 163 111
Living with partner 9 150 67
Married 26 806 497
Never married 6 315 147
Separated 2 45 40
Widowed 4 132 70
Visualizing heart disease risk by marital status
# filtering risk
demo_trigly_marital <- demo_trigly %>%
group_by(DMDMARTL,RISK) %>%
summarize(count = n()) %>%
drop_na()
p <- ggplot(data = demo_trigly_marital, mapping = aes(x = reorder(DMDMARTL,-count), y = count)) +
geom_col(mapping = aes(fill = RISK)) +
coord_flip() +
scale_fill_manual(values = c("#FF3500", "#FF7D05", "#FFC300")) +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 1)
) +
labs(title = "Heart disease risk by marital status",
subtitle = "Risk based on LDL level",
x = "Marital status",
y = "Total patients",
fill = "Risk level")
# transition_reveal(count) +
# ease_aes('linear')
# anim_save("../shiny/www/gganimate01.gif", animate(p))
ggplotly(p)
Education level and heart disease risk
# table marital status and heart disease risk
table(demo_trigly$DMDHREDU,demo_trigly$RISK)
high low medium
1 6 236 112
2 7 219 133
3 12 428 236
4 16 611 306
5 13 474 254
9 1 6 2
Visualizing Heart disease risk by education level
demo_trigly_education <- demo_trigly %>%
filter(DMDHREDU <= 5)
p <- ggplot(data = demo_trigly_education, mapping = aes(x = DMDHREDU, y = LBDLDL)) +
geom_point(color = "#FFC300", alpha = 0.4) +
geom_smooth(size = 1.1, method = "loess", se = FALSE) +
scale_x_log10() +
theme(strip.background = element_rect(
fill="#FF7D05", size=1.5, linetype="solid"
),panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 1)
)+
facet_wrap(~RISK, ncol = 3) +
labs(x = "Education Level (1 - lowest, 5 - highest)",
y = "LDL Levels",
title = "Heart disease risk by education level",
subtitle = "Risk based on LDL level")
ggplotly(p)
Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
Removed 478 rows containing non-finite values (stat_smooth).pseudoinverse used at 0.70246neighborhood radius 0.22534reciprocal condition number 6.0207e-17There are other near singularities as well. 0.090619pseudoinverse used at 0.70246neighborhood radius 0.22534reciprocal condition number 7.8557e-17There are other near singularities as well. 0.090619pseudoinverse used at 0.70246neighborhood radius 0.22534reciprocal condition number 4.2613e-17There are other near singularities as well. 0.090619
Annual household income of patients in study
table(demo_trigly$INDHHIN2)
1 2 3 4 5 6 7 8 9 10 12 13 14 15 77 99
76 114 188 192 186 390 310 246 187 157 79 58 298 485 82 45
Visualizing Heart disease risk by Annual household income
p <- ggplot(data = demo_trigly, mapping = aes(x = INDHHIN2, fill = RISK)) +
geom_density(alpha = 0.7) +
scale_x_log10() +
scale_fill_manual(values = c("#FF3500", "#FFC300", "#FF7D05")) +
labs(x = "Annual Household Income (1 - lowest, 100 - highest)",
y = "Risk score",
title = "Heart disease risk by annual household income",
subtitle = "Risk based on LDL level")
ggplotly(p)
Removed 98 rows containing non-finite values (stat_density).
Visualizing National Center for Health Statistics region
# filtering Maryland region, where National Center for Health Statistics is located at
states <- map_data("state")
nchs_region <- states %>%
filter(region == 'maryland')
p <- ggplot() +
geom_polygon(data = states, aes(x=long,y=lat, group = group, fill=region),color="white", alpha = 0.3) +
geom_polygon(data = nchs_region, aes(x=long,y=lat, group = group, fill=region),color="white") +
labs(title = "National Center for Health Statistics - Maryland") +
theme_void()
guides(fill = FALSE)
$fill
[1] FALSE
attr(,"class")
[1] "guides"
ggplotly(p) %>% layout(showlegend = FALSE)
Visualizing National Center for Health Statistics region
# National Center for Health Statistics coordenates
m <- leaflet() %>%
addTiles() %>%
addMarkers(lng=-76.951720,
lat=38.969450,
label="National Center for Health Statistics",
labelOptions = labelOptions(noHide=T))
m